In [1]:
import matplotlib.pyplot as plt

plt.rcParams.update({'figure.figsize': (20, 8)})
In [2]:
from models_gaussian_2d import *
from eval_utils import *
import time
2022-09-22 15:05:55.886527: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-09-22 15:05:55.886564: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
WARNING:tensorflow:From /Ziob/kabalce/miniconda3/envs/hmm/lib/python3.10/site-packages/tensorflow/python/compat/v2_compat.py:107: disable_resource_variables (from tensorflow.python.ops.variable_scope) is deprecated and will be removed in a future version.
Instructions for updating:
non-resource variables are not supported in the long term
In [3]:
from sklearn.decomposition import PCA
from celluloid import Camera
import matplotlib.cm as cm
from IPython.display import display, Markdown, Latex, HTML
In [4]:
import os
# os.chdir('./wodociagi')
In [5]:
import pandas as pd
import numpy as np
In [6]:
plt.rcParams.update({'figure.figsize': (20, 8)})

Prepare Data¶

In [7]:
df_main = pd.read_excel('../../data/Dane_Uwr.xlsx', sheet_name='Surowe_hydraulika').ffill()
df_main.columns = ['mtime', 'P1', 'V1', 'Q1']
df_main = df_main.ffill()
df_main["V_delta"] = np.array([0] + (df_main.V1[1:].values - df_main.V1[:-1].values).tolist())
In [8]:
plt.plot(df_main["mtime"], df_main["V1"])
plt.title("Original V1")
plt.show()

plt.plot(df_main["mtime"], df_main["V_delta"])
plt.title("Original V delta")
plt.show()
In [9]:
df_main.loc[(df_main.V_delta.abs() > 1e+3), "V_delta"] = 0

plt.plot(df_main["mtime"], df_main["V_delta"])
plt.title("V delta without outliers")
plt.show()

df_main = pd.concat([df_main.V_delta, df_main.mtime.dt.round("H")], axis=1).groupby("mtime").sum().reset_index()

plt.plot(df_main["mtime"], df_main["V_delta"])
plt.title("V delta ourly concatenation")
plt.show()
In [10]:
plt.plot(df_main.loc[(df_main.mtime.dt.year == 2019), "mtime"], df_main.loc[(df_main.mtime.dt.year == 2019), "V_delta"])


seasonal_changes = df_main.V_delta.rolling(24 * 42 * 6, center=True, min_periods=2).mean().rolling(24 * 7 * 6, center=True, min_periods=2).mean()[(df_main.mtime.dt.year == 2019)]

plt.plot(df_main.loc[(df_main.mtime.dt.year == 2019), "mtime"], seasonal_changes , color = "red")
plt.title("V delta ourly concatenation with seasonal changes - 07.2019 ")
plt.show()

data = df_main.V_delta[(df_main.mtime.dt.year == 2019)] - seasonal_changes

plt.plot(data)
plt.title("Data")

lengths = np.array([24 * 7 * 6 for _ in range(data.shape[0] // (24 * 7 * 6))] + [
    data.shape[0] - (data.shape[0] // (24 * 6 * 7)) * 24 * 7 * 6])
Y_true = data.values.reshape(-1, 1)
In [11]:
plt.plot(Y_true)
plt.title("Hourly deviations from seasonality")
plt.vlines(np.array([24*i for i in range(Y_true.shape[0] // 24)]), -10, 7.5, color='grey')
plt.show()
In [12]:
plt.plot(pd.Series(Y_true[:, 0]).rolling(6, center=True, min_periods=2).mean())
plt.title("Hourly deviations from seasonality")
plt.vlines(np.array([24*i for i in range(Y_true.shape[0] // 24)]), -100, 100, color='grey')
plt.show()
In [13]:
print(Y_true.shape)
Y_true = pd.Series(Y_true[:, 0]).rolling(6, center=True, min_periods=2).mean().values.reshape(-1, 1)
print(Y_true.shape)
(8760, 1)
(8760, 1)

Build model¶

In [14]:
n = 10
l = 4
lr = 0.03271365590433669
ITER = 714450


lr = 0.1
ITER = 70000

TOLERANCE = 1e-4

def em_scheduler(max_lr, it):
    if it <= np.ceil(2 * ITER / 3):
        return max_lr * np.cos((np.ceil(ITER * 2 / 3) - it / 2) / ITER * np.pi * .67)
    else:
        return max_lr * np.cos(3 * (np.ceil(ITER * 2 / 3) - it) * np.pi * .33 / ITER)  ** 3


mstep_cofig = {"cooc_lr": lr, "cooc_epochs": ITER, "l_uz": l,
               'loss_type': 'square', "scheduler": em_scheduler}

t = time.localtime()

true_values = None

wandb_params = {
    "init": {
        "project": "gaussian-dense-hmm-wodociagi",
        "entity": "cirglaboratory",
        "save_code": True,
        "group": f"first-runs-2",
        "job_type": f"{t.tm_year}-{t.tm_mon}-{t.tm_mday}",
        "name": f"test3",
        "reinit": True
    },
    "config": {
        "n": n,
        "s": len(lengths),
        "T": lengths.max(),
        "model": None,
        "m": None,
        "l": None,
        "lr": lr,
        "em_epochs": None,
        "em_iter": None,
        "cooc_epochs": ITER,
        "simple_model": None
    }
}


mstep_cofig = {"cooc_lr": lr, "cooc_epochs": ITER, "l_uz": l,
               'loss_type': 'square', "scheduler": em_scheduler}
In [15]:
hmm_monitor = DenseHMMLoggingMonitor(tol=TOLERANCE, n_iter=0, verbose=True,
                                wandb_log=True, wandb_params=wandb_params, true_vals=true_values,
                                log_config={'metrics_after_convergence': True})
densehmm = GaussianDenseHMM(n, mstep_config=mstep_cofig,
                            covariance_type='full', opt_schemes={"cooc"},
                            logging_monitor=hmm_monitor,
                            init_params="stmc", params="stmc", early_stopping=True)
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
wandb: Currently logged in as: kabalce (cirglaboratory). Use `wandb login --relogin` to force relogin
wandb version 0.13.3 is available! To upgrade, please run: $ pip install wandb --upgrade
Tracking run with wandb version 0.12.21
Run data is saved locally in /Ziob/kabalce/GausianDenseHMM/code_dense_hmm/wodociagi/wandb/run-20220922_150702-1rq3q0lj
Syncing run test3 to Weights & Biases (docs)
In [16]:
start = time.perf_counter()
densehmm.fit_coocs(Y_true,lengths)
time_tmp = time.perf_counter() - start
2022-09-22 15:07:11.602083: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-09-22 15:07:11.602632: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-09-22 15:07:11.605438: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcublas.so.11'; dlerror: libcublas.so.11: cannot open shared object file: No such file or directory
2022-09-22 15:07:11.609816: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcublasLt.so.11'; dlerror: libcublasLt.so.11: cannot open shared object file: No such file or directory
2022-09-22 15:07:11.612441: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcufft.so.10'; dlerror: libcufft.so.10: cannot open shared object file: No such file or directory
2022-09-22 15:07:11.615484: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcurand.so.10'; dlerror: libcurand.so.10: cannot open shared object file: No such file or directory
2022-09-22 15:07:11.618686: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcusolver.so.11'; dlerror: libcusolver.so.11: cannot open shared object file: No such file or directory
2022-09-22 15:07:11.621313: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcusparse.so.11'; dlerror: libcusparse.so.11: cannot open shared object file: No such file or directory
2022-09-22 15:07:11.625628: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudnn.so.8'; dlerror: libcudnn.so.8: cannot open shared object file: No such file or directory
2022-09-22 15:07:11.625644: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1850] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...
2022-09-22 15:07:11.646963: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-09-22 15:07:11.695360: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:354] MLIR V1 optimization pass is not enabled
         1     -111936.2803             +nan
         2     -109119.1431       +2817.1372
         3     -107760.4405       +1358.7026
         4     -106493.8307       +1266.6098
         5     -105024.9959       +1468.8348
         6     -103302.3239       +1722.6720
         7     -102025.0420       +1277.2819
         8     -101349.4184        +675.6236
         9     -101149.5445        +199.8739
        10     -101061.1022         +88.4423
        11     -101018.6675         +42.4348
        12     -100993.4459         +25.2216
        13     -100963.2177         +30.2282
        14     -100906.0119         +57.2058
        15     -100785.9029        +120.1091
        16     -100492.1321        +293.7708
        17      -99945.6941        +546.4380
        18      -99504.3104        +441.3837
        19      -98727.2713        +777.0391
        20      -97916.0223        +811.2490
        21      -97299.8312        +616.1911
        22      -96865.8197        +434.0115
        23      -96713.8580        +151.9617
        24      -96630.6961         +83.1620
        25      -96562.4897         +68.2064
        26      -96498.5590         +63.9306
        27      -96432.7724         +65.7866
        28      -96361.5641         +71.2083
        29      -96283.2469         +78.3172
        30      -96197.6848         +85.5621
        31      -96106.5033         +91.1815
        32      -96013.8802         +92.6231
        33      -95927.8293         +86.0509
        34      -95861.1785         +66.6508
        35      -95829.3068         +31.8717
        36      -95839.8128         -10.5059
        37      -95880.0889         -40.2762
        38      -95926.6728         -46.5839
        39      -95965.8657         -39.1929
        40      -95995.1739         -29.3082
        41      -96015.9843         -20.8104
        42      -96029.9209         -13.9366
        43      -96038.1488          -8.2278
        44      -96041.4660          -3.3173
        45      -96040.4551          +1.0109
        46      -96035.5763          +4.8789
        47      -96027.2163          +8.3599
        48      -96014.3791         +12.8372
        49      -95995.0550         +19.3241
        50      -95972.2024         +22.8526
        51      -95946.6646         +25.5378
        52      -95919.1981         +27.4665
        53      -95890.4704         +28.7276
        54      -95861.0618         +29.4086
        55      -95831.4699         +29.5919
        56      -95802.1174         +29.3525
        57      -95773.3607         +28.7567
        58      -95745.4983         +27.8624
        59      -95718.7795         +26.7189
        60      -95693.4102         +25.3693
        61      -95669.5583         +23.8519
        62      -95647.3567         +22.2016
        63      -95626.9049         +20.4518
        64      -95608.2697         +18.6352
        65      -95591.4858         +16.7839
        66      -95576.5561         +14.9296
        67      -95563.4529         +13.1033
        68      -95552.1189         +11.3340
        69      -95542.4706          +9.6483
        70      -95534.4007          +8.0699
In [17]:
# densehmm_org_aparms = densehmm
# densehmm_100k = densehmm
# densehmm_200k = densehmm
# densehmm_700k = densehmm
# densehmm_500k = densehmm
In [58]:
 

Results¶

Embeddings movement¶

In [18]:
z_init = np.transpose(hmm_monitor.z[-1])
pca_z = PCA(n_components=2).fit(z_init)
z = [pca_z.transform(z_init)] + [pca_z.transform(np.transpose(x)) for x in hmm_monitor.z]

z0 = list(hmm_monitor.z0)

u_init = hmm_monitor.u[-1]
pca_u = PCA(n_components=2).fit(u_init)
u = [pca_u.transform(u_init)] + [pca_u.transform(x) for x in hmm_monitor.u]
In [19]:
# Draw embeddings trajectories

def draw_embeddings(embeding_list, name="?"):
    fig = plt.figure(figsize=(5, 5))
    camera = Camera(fig)
    cmap = cm.rainbow(np.linspace(0, 1, len(embeding_list[0])))
    for z_el in embeding_list:
        if z_el.shape[1] > 1:
            plt.scatter(z_el[:, 0], z_el[:, 1], color=cmap)
        else:
            plt.scatter(np.arange(z_el.shape[0]), z_el, color=cmap)
        camera.snap()
    plt.title(f"Embaddings trajectory:  {name}")
    animation = camera.animate().to_html5_video()
    wandb.log({f"Embaddings trajectory:  {name}": wandb.Html(animation)})
    display(HTML(animation))
    plt.close()
In [20]:
draw_embeddings(z, "z")
draw_embeddings(z0, "z0")
draw_embeddings(u, "u")
Your browser does not support the video tag.
Your browser does not support the video tag.
Your browser does not support the video tag.

Embedding similarities¶

In [21]:
# Draw graph scaled with width and alpha
representation = densehmm.get_representations()
In [22]:
u_fin, z_fin, z0_fin = representation
In [23]:
u_fin
Out[23]:
array([[ 0.81522926,  0.7749844 , -1.36163476,  0.19171012],
       [ 1.25762801, -1.64764015, -0.86976148, -0.81340828],
       [-0.34090178, -0.36307474,  0.91574129, -0.88924059],
       [-0.61967726, -1.28221738, -1.29863423,  1.1147602 ],
       [-1.87944403, -2.08516514,  1.0882987 , -1.85575953],
       [ 1.54160007,  0.39915993, -0.87551868,  0.071946  ],
       [ 1.00168665, -1.1307586 , -2.46512821,  0.76002356],
       [-1.07714347, -1.33433337, -0.45162947,  1.11889856],
       [-0.09443906,  0.53315114,  0.00746519, -1.81783205],
       [-1.22671903,  0.27836264,  0.07269379, -0.77325455]])
In [24]:
z_fin
Out[24]:
array([[ 0.7089109 ,  0.45454697,  1.07223965, -1.55627429,  0.56180282,
         1.28863812, -0.07946448, -0.17687036,  0.6124697 ,  0.01874523],
       [ 1.73107433, -0.25286488,  0.30963567,  0.09742843, -1.62417265,
         1.30218801, -1.41790256, -0.40837113,  0.31231638, -0.18306023],
       [-1.13546512,  1.74606951, -1.20980683, -1.55247084,  0.24461675,
         0.14658406, -0.91979523,  0.31339505,  0.72595757, -2.27086582],
       [-0.36310786,  0.10921562,  1.94817127,  0.75534236, -0.93796748,
        -0.04715727, -0.01200375,  1.42588812,  0.27495063, -0.07798396]])
In [25]:
z0_fin
Out[25]:
array([[ 0.28449496],
       [ 0.17474797],
       [-1.46511687],
       [ 1.34018219]])
In [26]:
np.matmul(u_fin, z_fin)
Out[26]:
array([[ 3.39595782, -2.18197754,  3.13488313,  1.0654902 , -1.31360689,
         1.85107642,  0.08648979, -0.61404356, -0.1944347 ,  2.95055236],
       [-0.67770261, -0.61921955,  0.30589518, -1.38186354,  3.93278332,
        -0.61404492,  3.04602284, -0.9819966 , -0.59938299,  2.36373632],
       [-1.58708038,  1.43868196, -3.31821233, -1.59817985,  1.45626088,
        -0.73592337, -0.28972607, -0.77240415,  0.09810587, -1.95010483],
       [-1.58913387, -2.10320109,  2.6813776 ,  3.69758074,  0.37112947,
        -2.71115596,  3.04839791,  1.8157609 , -1.41624037,  3.08519769],
       [-5.50381853,  1.3705367 , -7.59282452, -0.36950971,  4.33764941,
        -4.89016047,  2.12717414, -1.12109886, -1.52251731, -1.98018065],
       [ 2.75182936, -0.92106272,  2.97593048, -0.94670197, -0.06387891,
         2.37461604,  0.11596195, -0.60746502,  0.45303971,  1.93840214],
       [ 1.27577596, -3.48003598,  5.18690995,  2.73205042,  1.08340855,
        -0.57883783,  3.78199704,  0.59575009, -1.32026123,  5.76447951],
       [-2.96690035, -0.81858149,  1.15807731,  3.09262174,  0.40207894,
        -3.24456693,  2.379525  ,  2.18930373, -1.09667387,  1.16240585],
       [ 1.50756801, -0.3632431 , -3.48665831, -1.18575789,  0.78790782,
         0.65938352, -0.73349746, -2.79070564, -0.38572388,  0.02544024],
       [-0.18953453, -0.58551262, -2.82352352,  1.23930481, -0.39821345,
        -1.17119608, -0.35479196, -0.97649763, -0.82422524, -0.17872865]])
In [27]:
z0_fin_red = z0[-1]
z_fin_red = z[-1]
u_fin_red = u[-1]
In [28]:
A_trained = np.exp(np.matmul(u_fin, z_fin)) / np.exp(np.matmul(u_fin, z_fin)).sum(axis=1).reshape(-1, 1)
In [29]:
A_largest = A_trained * (A_trained == A_trained.max(axis=1).reshape(-1, 1))
In [30]:
A_largest
Out[30]:
array([[0.35506057, 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.58791086,
        0.        , 0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.36101667,
        0.        , 0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.38015969, 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.84803103,
        0.        , 0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.33842444, 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.56303925],
       [0.        , 0.        , 0.        , 0.43628568, 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ],
       [0.3786148 , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.41953012, 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ]])
In [31]:
import itertools
In [32]:
plt.figure(figsize=(10, 10))

for i, j in itertools.product(range(n), range(n)):
    plt.arrow(z_fin_red[i, 0], z_fin_red[i, 1], u_fin_red[j, 0] - z_fin_red[i, 0], u_fin_red[j, 1] - z_fin_red[i, 1], width=A_trained[i, j]/15)

plt.scatter(z_fin_red[:, 0], z_fin_red[:, 1], color = cm.rainbow(np.linspace(0, 1, n)))
plt.scatter(u_fin_red[:, 0], u_fin_red[:, 1], color = cm.rainbow(np.linspace(0, 1, n)))

plt.title("Transpositions")
plt.show()
In [33]:
z_fin_red
Out[33]:
array([[-0.72267441,  1.63276854],
       [ 2.10379795, -0.08135393],
       [-0.87688568,  0.78568821],
       [-1.70206594, -0.68215358],
       [ 1.13645818, -1.44760401],
       [ 0.64435133,  1.58403377],
       [-0.32387269, -1.47386778],
       [ 0.38551427, -0.39773221],
       [ 1.0853405 ,  0.46823828],
       [-1.72996349, -0.38801729]])
In [34]:
uz_fin = np.concatenate([u_fin, np.transpose(z_fin)], axis=1)
In [35]:
pca_uz = PCA(n_components=2)
uz_fin_red = pca_uz.fit_transform(uz_fin)
In [36]:
uz_fin_red
Out[36]:
array([[-2.29324113, -0.86019254],
       [ 0.09265259, -1.07771949],
       [ 0.78577611, -0.5595661 ],
       [-0.74000947,  2.60998982],
       [ 3.69374412, -0.08307797],
       [-1.90316739, -1.72143021],
       [-1.35671569,  1.64144196],
       [ 0.43200548,  1.44389191],
       [ 0.63241565, -1.94894777],
       [ 0.65653973,  0.55561038]])
In [37]:
plt.figure(figsize=(10, 10))

for i, j in itertools.product(range(n), range(n)):
    if i == j:
        continue
    plt.arrow(uz_fin_red[i, 0], uz_fin_red[i, 1], uz_fin_red[j, 0] - uz_fin_red[i, 0], uz_fin_red[j, 1] - uz_fin_red[i, 1], width=(A_trained[i, j] / 1.5 + 0.33) * 0.06, alpha=A_trained[i, j] / 1.5 + 0.33)  #, color=cm.rainbow(A_trained[i, j] )

plt.scatter(uz_fin_red[:, 0], uz_fin_red[:, 1], color = cm.rainbow(np.linspace(0, 1, n)))
plt.scatter(uz_fin_red[:, 0], uz_fin_red[:, 1], color = cm.rainbow(np.linspace(0, 1, n)))

plt.title("Transpositions")
plt.show()
In [38]:
states = densehmm.predict(Y_true).reshape(1, -1)

interval = 7 * 24

for i in range(Y_true.shape[0] // (interval) + 1):
    plt.figure(figsize=(10, 10))
    plt.plot(Y_true[(i*interval) : ((i+1)*interval)])
    plt.imshow(states[:, (i*interval) : ((i+1)*interval)], extent=(0, Y_true[(i*interval) : ((i+1)*interval)].shape[0], -100, 100), cmap=cm.rainbow)
    plt.show()
In [39]:
plt.hist(densehmm.predict(Y_true).reshape(-1), [i - 0.5 for i in range(n+1)])
plt.title("State frequency")
plt.show()
In [40]:
plt.matshow(pd.DataFrame({"states": states.reshape(-1), "hour": np.arange(states.shape[1]) % 24 }).value_counts().reset_index().sort_values(["states", "hour"]).pivot("states", "hour", 0).fillna(0).values)
plt.ylabel("state")
plt.xlabel("hour")
plt.title("State frequency in 24 hours")
plt.show()
In [41]:
weekly = pd.DataFrame({"states": states.reshape(-1), "hour": np.arange(states.shape[1]) % (24 * 7) }).value_counts().reset_index().sort_values(["states", "hour"]).pivot("states", "hour", 0).fillna(0).values
l = weekly.shape[1] // 7
days = ['wtorek', 'środa', 'czwartek', 'piątek', 'sobota', 'niedziela', 'poniedziałek']
for i in range(7):
    plt.matshow(weekly[:, (l*i) : (l*(i+1))])
    plt.ylabel("state")
    plt.xlabel("hour")
    plt.title(f"State frequency in 24 hours: {days[i]}")
    plt.show()
In [ ]: